************************************

* CZ characteristics from analytic data 

use "${clean_data}/panel_physicians_clean.dta", clear 

assert !missing(ptotinc)

foreach inc in ptotinc logptotinc aginc logaginc {
	
	gegen mean_`inc'_allage_ally = mean(`inc'), by(cz90)
	gegen obs_`inc'_allage_ally = count(`inc'), by(cz90)
	
	gegen xmean_`inc'_allage_2017 = mean(`inc') if year == 2017, by(cz90)
	gegen xobs_`inc'_allage_2017 = count(`inc') if year == 2017, by(cz90)
	gegen mean_`inc'_allage_2017 = max(xmean_`inc'_allage_2017), by(cz90)
	gegen obs_`inc'_allage_2017 = max(xobs_`inc'_allage_2017 ), by(cz90)
	
	gegen xmean_`inc'_4055_ally = mean(`inc') if inrange(age, 40, 55), by(cz90)
	gegen xobs_`inc'_4055_ally = count(`inc') if inrange(age, 40, 55), by(cz90)
	gegen mean_`inc'_4055_ally = max(xmean_`inc'_4055_ally), by(cz90)
	gegen obs_`inc'_4055_ally = max(xobs_`inc'_4055_ally), by(cz90)
	
	gegen xmean_`inc'_4055_2017 = mean(`inc') if inrange(age, 40, 55) & year == 2017, by(cz90)
	gegen xobs_`inc'_4055_2017 = count(`inc') if inrange(age, 40, 55) & year == 2017, by(cz90)
	gegen mean_`inc'_4055_2017 = mean(xmean_`inc'_4055_2017), by(cz90)
	gegen obs_`inc'_4055_2017 = count(xobs_`inc'_4055_2017), by(cz90)

}

gen count_docs_ally = obs_ptotinc_allage_ally
gen count_docs_2017 = obs_ptotinc_allage_2017
keep cz90 mean* obs_* count_*
duplicates drop
isid cz90 
sort cz90 
rename cz90 cz
save "${intermediate_data}/cz_characteristics/_cz_characteristics_panel_physicians.dta", replace

************************************

* County characteristics from AHRF data

use "${raw_data}/AHRF2019/ahrf2019.dta", clear
keep f00008 f12424 f00010 f00011 f00012 f0002013 f1467717 f0885717 f1419612 f1319117 f0892117 f1198417 f1547117
gen cfips = f00011 + f00012
drop f00011 f00012
order f00008 f12424 f00010 cfips f0002013 f1467717 f0885717 f1419612 f1319117 f0892117 f1198417 f1547117
rename	(f00008 f12424 f00010 f0002013 f1467717 f0885717 f1419612 f1319117 f0892117 f1198417 f1547117) ///
	(state stateabrv county ruralurban npcpdocs ntotaldocs medicaidelig medicareelig beds population pop_under65)	
gen nspecdocs = ntotaldocs - npcpdocs
foreach z in nspecdocs npcpdocs medicaidelig medicareelig beds {
	gen `z'_pc = (`z'/population) * 100000
	drop `z'
}
gen pop_over65 = population - pop_under65
destring ruralurban, replace 
gen todrop = substr(cfips, 1, 2)
destring todrop, replace
drop if todrop > 56
save "${intermediate_data}/cz_characteristics/_countyhealthcare.dta", replace

************************************

* County characteristics from County Health Rankings data

import excel "${raw_data}/cz_characteristics/2017CountyHealthRankingsData.xls", sheet("Ranked Measure Data") cellrange(A2:EZ3138) firstrow case(lower) clear
rename fips cfips
rename yearsofpotentiallifelostrat yearslife
save "${intermediate_data}/cz_characteristics/_countyhealth.dta", replace

************************************

* County-level (based on locality level) Medicare payment adjustment
	
* Import GPCIs and GAFs 
import delimited "${raw_data}/CountyGPCIsandGAFsMasterFile/Baseline GPCIs and GAFs.csv", stringcols(1) clear	
isid fips 
rename fips cfips 
save "${intermediate_data}/cz_characteristics/_county_GPCIs.dta", replace
	
* Collapse to CZ level using RVU weights 

use "${intermediate_data}/crosswalks/cfips_cz_state_xwalk.dta", clear
rename cz90 cz
destring cz, replace
merge 1:1 cfips using "${intermediate_data}/cz_characteristics/_county_GPCIs.dta", keep(master match) nogen
collapse baseline* (rawsum) rvu [aw = rvu], by(cz)
isid cz 
save "${intermediate_data}/cz_characteristics/_CZ_GPCIs.dta", replace

************************************

* Price index (Diamond and Moretti, 2021)

use "${raw_data}/price_index/price_indexes.dta", clear 
gen cz_price_index_hi = a3_laspeyres 
rename cz cz00
keep cz00 cz_name cz_price_index_hi *laspeyres
isid cz00 
save "${intermediate_data}/cz_characteristics/_cz_price_indexes.dta", replace


************************************

* CZ treatment effects from Finkelstein, Gentzkow, Williams (2021)

import excel "${raw_data}/cz_characteristics/full_eb_estimates.xlsx", sheet("EB-Adjusted Estimates") firstrow case(lower) clear 
replace czid = "" if czid == "NA"
destring czid, replace
drop if mi(czid)
rename (czid adjustedtreatmenteffect) (cz00 adjtreat)
label var adjtreat "Life Expectancy Treat. Effect"
save "${intermediate_data}/cz_characteristics/_czmortalityeffect.dta", replace

************************************

* Merge all CZ characteristics

use "${intermediate_data}/crosswalks/cfips_cz_state_xwalk.dta", clear

replace cfips = "12086" if cfips == "12025"
merge 1:1 cfips using "${intermediate_data}/cz_characteristics/_countyhealthcare.dta", keep(master match) nogen
merge 1:1 cfips using "${intermediate_data}/cz_characteristics/_countyhealth.dta", keep(master match) nogen
merge m:1 cz00 using "${intermediate_data}/cz_characteristics/_cz_price_indexes.dta", keep(master match) nogen
merge m:1 cz00 using "${intermediate_data}/cz_characteristics/_czmortalityeffect.dta", keep(master match) nogen
rename cz90 cz
destring cz, replace

collapse yearslife foodenvironmentindex preventablehosprate pcprate receivinghba1c mammography adjtreat age65le ruralurban *_pc cz_price_index_hi *laspeyres (rawsum) pop_over65 population [aw = population], by(cz)

xtile quants = cz_price_index_hi, nq(4)
gen desirable_cz = (quants== 4) 

merge m:1 cz using "${raw_data}/cz_characteristics/health_ineq_online_table_10.dta", keep(master match) keepusing(exercise_any_q1 puninsured2010 reimb_penroll_adj10 frac_middleclass pop_density median_house_value cs_educ_ba hhinc00) nogen
 
merge 1:1 cz using "${raw_data}/cz_characteristics/health_ineq_online_table_6.dta", keep(master match) keepusing(le_agg_q1_F le_agg_q1_M count_q1_F count_q1_M) nogen

gen le_agg_q1 = le_agg_q1_F * (count_q1_F/(count_q1_F + count_q1_M)) + le_agg_q1_M * (count_q1_M/(count_q1_F + count_q1_M))
drop *_F *_M

merge 1:1 cz using "${raw_data}/cz_characteristics/cz_covariates.dta", keep(master match) nogen
merge 1:1 cz using "${intermediate_data}/cz_characteristics/_CZ_GPCIs.dta", keep(master match) nogen
merge 1:1 cz using "${intermediate_data}/cz_characteristics/_cz_characteristics_panel_physicians.dta", keep(master match) nogen

drop quants
rename cz cz90 
rename baselinesmoothedgaf GAF
label var adjtreat "Life Expectancy Treat. Effect"
label var le_agg_q1 "Life Expectancy"
gen logpopulation = log(population)

isid cz90

keep cz90 czname count_docs_ally count_docs_2017 ///
	mean_ptotinc_allage_ally-obs_logaginc_4055_2017 ///
	ruralurban logpopulation pop_over65 pop_density hhinc00 hhinc_mean2000 med_hhinc1990 med_hhinc2016 ///
	pcprate nspecdocs_pc npcpdocs_pc medicaidelig_pc medicareelig_pc puninsured2010 ///
	reimb_penroll_adj10 frac_middleclass ///
	median_house_value cs_educ_ba job_growth_1990_2010 GAF ///
	cz_price_index_hi a0_laspeyres a1_laspeyres a2_laspeyres a3_laspeyres desirable_cz ///
	le_agg_q1 adjtreat age65le

order cz90 czname count_docs_ally count_docs_2017 ///
	mean_ptotinc_allage_ally-obs_logaginc_4055_2017 ///
	ruralurban logpopulation pop_over65 pop_density hhinc00 hhinc_mean2000 med_hhinc1990 med_hhinc2016 ///
	pcprate nspecdocs_pc npcpdocs_pc medicaidelig_pc medicareelig_pc puninsured2010 ///
	reimb_penroll_adj10 frac_middleclass ///
	median_house_value cs_educ_ba job_growth_1990_2010 GAF ///
	cz_price_index_hi a0_laspeyres a1_laspeyres a2_laspeyres a3_laspeyres desirable_cz ///
	le_agg_q1 adjtreat age65le

save "${intermediate_data}/cz_characteristics/CZ1990Characteristics.dta", replace
